library(tidyverse)
library(gridExtra)
library(ggpubr)
load("california.Rds")



candidates = california
winners = california |> filter(ELECTED == 1)

rep = c("Business Employee", "Military or Law Enforcement", "Business Owner/Executive")
dem = c("Service Based Professional" , "Lawyer", "Non-Profit Worker")



#main approach 
graph_data = data.frame(PLACE = rep(unique(california$PLACE), 9)) |> 
  arrange(PLACE) |> 
  mutate(label = rep(unique(california$label), length(unique(california$PLACE))))



#candidates
gd_cand = merge(graph_data, 
      candidates |> 
  group_by(PLACE, dem_share, label, total_population) |> 
  count() |> 
  merge(
    candidates |> 
      group_by(PLACE) |> 
      summarise(n_tot = n())
  ) |> 
  mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_cand = gd_cand |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_cand = merge(gd_cand, tiles_cand) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))





#winners
gd_win = merge(graph_data, 
                winners |> 
                  group_by(PLACE, dem_share, label, total_population) |> 
                  count() |> 
                  merge(
                    winners |> 
                      group_by(PLACE) |> 
                      summarise(n_tot = n())
                  ) |> 
                  mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_win = gd_win |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_win = merge(gd_win, tiles_win) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))



#plots
r_candidates_plot = ggplot(gd_cand |> filter(label %in% rep), 
                         aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category")+
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






r_winners_plot = ggplot(gd_win |> filter(label %in% rep), 
                      aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





rep_careers = ggarrange(r_candidates_plot, r_winners_plot, 
          common.legend = T, legend = "bottom")



rep_careers



d_candidates_plot = ggplot(gd_cand |> filter(label %in% dem), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category") + 
  scale_color_brewer(palette = "Set2")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  ) + 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






d_winners_plot = ggplot(gd_win |> filter(label %in% dem), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





dem_careers = ggarrange(d_candidates_plot, d_winners_plot, 
                        common.legend = T, legend = "bottom")







ggarrange(rep_careers, dem_careers, nrow = 2)

ggsave(filename = "figures/rawdata.png", height = 12, width = 12, bg = "white")





#appendix display
app = unique(california$label)[unique(california$label) %in% c(dem, rep) == F]

a_candidates_plot = ggplot(gd_cand |> filter(label %in% app), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% app), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% app), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .60)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category")+
  scale_color_brewer(palette = "Dark2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )



a_winners_plot = ggplot(gd_win |> filter(label %in% app), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% app), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% app), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .60)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Dark2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )



app_careers = ggarrange(a_candidates_plot, a_winners_plot, 
                        common.legend = T, legend = "bottom")

app_careers


ggsave(filename = "figures/app_rawdata.png", height = 7, width = 12, bg = "white")






